2  Baseline Relatedness

Purpose: estimate relatedness within baseline populations

Notes:

2.1 Load relatedness

Code
base_demerel <- read_rds("/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Baseline Relatedness/UpperSnakeRiver_GTseq_Baseline_Relatedness.rds")
par(mar = c(4,4,0.5,0.5), mgp = c(2.5,1,0))
hist(unlist(base_demerel[[1]]$Empirical_Relatedness), xlab = "Wang relatedness coefficient", main = "")

Frequency histogram of Wang relatedness statistics for a single population (Cliff Creek)

Convert Wang relatedness estimates to csv

Code
pops <- names(base_demerel[[1]]$Empirical_Relatedness)
wanglist <- list()
for (i in 1:length(pops)) {
  wanglist[[i]] <- tibble(repunit = pops[i],
                          compar = names(unlist(base_demerel[[1]]$Empirical_Relatedness[i])),
                          wang = unlist(base_demerel[[1]]$Empirical_Relatedness[i]))
}
wangtib <- do.call(rbind, wanglist)
wangtib <- wangtib %>% 
  mutate(tmp = str_split(compar, "[.]")) %>% 
  mutate(compar2 = map_chr(tmp, 2)) %>% 
  select(-tmp) %>%
  mutate(tmp = str_split(compar2, "_")) %>%
  mutate(indiv1 = paste(map_chr(tmp, 1), map_chr(tmp, 2), sep = "_"),
         indiv2 = paste(map_chr(tmp, 3), map_chr(tmp, 4), sep = "_")) %>%
  select(-tmp) %>%
  select(repunit, compar2, indiv1, indiv2, wang) %>%
  rename(compar = compar2)
str(wangtib)
tibble [34,156 × 5] (S3: tbl_df/tbl/data.frame)
 $ repunit: chr [1:34156] "bacon_NA" "bacon_NA" "bacon_NA" "bacon_NA" ...
 $ compar : chr [1:34156] "OclVARI23G_0550_OclVARI23G_0551" "OclVARI23G_0550_OclVARI23G_0552" "OclVARI23G_0551_OclVARI23G_0552" "OclVARI23G_0550_OclVARI23G_0553" ...
 $ indiv1 : chr [1:34156] "OclVARI23G_0550" "OclVARI23G_0550" "OclVARI23G_0551" "OclVARI23G_0550" ...
 $ indiv2 : chr [1:34156] "OclVARI23G_0551" "OclVARI23G_0552" "OclVARI23G_0552" "OclVARI23G_0553" ...
 $ wang   : Named num [1:34156] -0.0464 0.38679 0.07555 -0.06005 0.00333 ...
  ..- attr(*, "names")= chr [1:34156] "bacon_NA.OclVARI23G_0550_OclVARI23G_0551" "bacon_NA.OclVARI23G_0550_OclVARI23G_0552" "bacon_NA.OclVARI23G_0551_OclVARI23G_0552" "bacon_NA.OclVARI23G_0550_OclVARI23G_0553" ...
Code
write_csv(wangtib, "/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Baseline Relatedness/UpperSnakeRiver_GTseq_Baseline_Relatedness_Wang.csv")

How many pairs of individuals exceed relatedness threshold of 0.4? (sensu Kallo et al. 2023)

Code
dim(wangtib %>% filter(wang >= 0.4))[1]
[1] 967

2.2 Filter by relatedness

What might we expect the relatedness coefficient to be for full siblings?

Code
# view full sib simulations
range(base_demerel[[1]]$Randomized_Populations_for_Relatedness_Statistics[[1]]$Randomized_Fullssibs)
[1] 0.3520966 0.6464401
Code
par(mar = c(4,4,1,1))
boxplot(base_demerel[[1]]$Randomized_Populations_for_Relatedness_Statistics[[1]]$Randomized_Fullssibs, ylab = "Wang relatedness coef.")

2.3 Identify full siblings

Identify full siblings and create list of full sibs to drop, retaining two full sibs per family (see recommendations in Ostergren et al 2020 Mol Ecol Res).

Code
# pull out only highly related individuals (full siblings)
wt2 <- wangtib %>% filter(wang >= 0.4) # 0.4 is ~ conservative given the simulated/randomied full sib stats above

# create full sibling famlies
group_pairs <- function(vector_1, vector_2) {
  groups_of_ids <- list()
  df <- cbind(vector_1, vector_2)
  ids_grouped <- c()
  for (id_group in unique(unlist(df))) {
    if (id_group %in% ids_grouped) {
      next
    }
    id <- id_group
    while (length(id) > 0) {
      newid <- unique(unlist(df[which(df[, 1] %in% id | df[, 2] %in% id), ]))
      id <- newid[!newid %in% id_group]
      id_group <- c(id_group, id)
    }
    ids_grouped <- c(ids_grouped, id_group)
    groups_of_ids[[length(groups_of_ids) + 1]] <- sort(unique(id_group))
  }
  return(groups_of_ids)
}
full_sibs_list <- group_pairs(wt2$indiv1, wt2$indiv2)
full_sibs <- unlist(full_sibs_list)

# these should match
# length(unique(c(wt2$indiv1, wt2$indiv2))) # number of unique individuals from pairwise Wang estimates, where wang >= 0.4
# length(unlist(full_sibs)) # number of unique individuals from list of full-sib families

# extract 2 sibling from each full-sib family 
fam_rep <- c(sapply(full_sibs_list, "[[", 1), sapply(full_sibs_list, "[[", 2))

# identify individuals to drop
drop_sibs <- full_sibs[!full_sibs %in% fam_rep]
write_csv(tibble(drop_sibs), "/Users/jeffbaldock/Library/CloudStorage/GoogleDrive-jbaldock@uwyo.edu/Shared drives/wyo-coop-baldock/UWyoming/Snake River Cutthroat/Analyses/Snake River GSI Quarto/Baseline Relatedness/UpperSnakeRiver_GTseq_Baseline_DropFullSiblings.csv")